home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
EnigmA Amiga Run 1996 February
/
EnigmA AMIGA RUN 04 (1996)(G.R. Edizioni)(IT)[!][issue 1996-02][Skylink CD III].iso
/
earcd
/
gnu
/
recode33.lha
/
recode-3.3
/
charname.pl
< prev
next >
Wrap
Text File
|
1993-12-19
|
5KB
|
197 lines
# Automatically derive charname.h from rfc1345.txt.
# Copyright (C) 1993 Free Software Foundation, Inc.
# Francois Pinard <pinard@iro.umontreal.ca>, 1993.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
open (HDR, ">charname.h");
print HDR <<END_OF_TEXT;
/* DO NOT MODIFY THIS FILE! It was generated by "charname.pl". */
/* Conversion of files between different charsets and usages.
Copyright (C) 1990, 1993 Free Software Foundation, Inc.
Francois Pinard <pinard@iro.umontreal.ca>, 1993.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
END_OF_TEXT
# Read the character comments. Count words in charnames.
print STDERR "Reading...";
$_ = <>;
while ($_)
{
chop;
# Look ahead one line and merge it if it should.
$next = <>;
if ($next =~ /^ ( .*)/)
{
$_ .= $1;
$next = <>;
}
# Separate fields and save needed information.
if (/([^ ]+) +[0-9a-f]+ +(.*)/)
{
$charname{$1} = $2;
if (length ($2) > $max_length)
{
$max_length = length ($2);
}
foreach $word (split (/ /, $2))
{
$code{$word}++;
}
}
elsif (!/ +e000/)
{
print "What about <<", $_, ">>?\n";
}
# Prepare for next line.
$_ = $next;
}
# Establish a mild compression scheme. Words @word[0] to
# @word[$singles-1] will be represented by a single byte running from
# 1 to $singles. All remaining words will be represented by two
# bytes, the first one running slowly from $singles+1 to 255, the
# second cycling faster from 1 to 255.
print STDERR "Sorting words...";
@word = sort descending keys %code;
$count = 0 + @word;
$singles = int ((255 * 255 - $count) / 254);
# Transmit a few values for further usage by the C code.
print STDERR "and charnames...";
@symbol = sort keys %charname;
printf HDR "\n#define NUMBER_OF_SINGLES %d\n", $singles;
printf HDR "\n#define MAX_CHARNAME_LENGTH %d\n", $max_length;
printf HDR "\n#define NUMBER_OF_CHARNAMES %d\n", (0 + @symbol);
# Establish a mild compression scheme (one or two bytes per word).
print STDERR "Writing words...";
print HDR "\n";
print HDR "static const char *const word[$count] =\n";
print HDR " {\n";
$char1 = 1;
$char2 = 1;
for ($counter = 0; $counter < $singles; $counter++)
{
$word = $word[$counter];
$word =~ tr/A-Z/a-z/;
printf HDR " %-28s/* %0.3o */\n", "\"$word\",", $char1;
$code{$word[$counter]} = $char1;
$char1++;
}
for (; $counter < $count; $counter++)
{
$word = $word[$counter];
$word =~ tr/A-Z/a-z/;
printf HDR " %-28s/* %0.3o %0.3o */\n", "\"$word\",", $char1, $char2;
$code{$word[$counter]} = 256 * $char1 + $char2;
if ($char2 == 255)
{
$char1++;
$char2 = 1;
}
else
{
$char2++;
}
}
print HDR " };\n";
# Print compressed charnames for all characters.
print STDERR "and charnames...";
print HDR "\n";
print HDR "struct charname\n";
print HDR " {\n";
print HDR " const char *symbol;\n";
print HDR " const char *crypted;\n";
print HDR " };\n";
print HDR "\n";
print HDR "static const struct charname charname[NUMBER_OF_CHARNAMES] =\n";
print HDR " {\n";
foreach $symbol (@symbol)
{
$string = $symbol;
$string =~ s/([\"])/\\\1/g;
print HDR " {\"$string\", \"";
foreach $word (split (' ', $charname{$symbol}))
{
$code = $code{$word};
if ($code < 256)
{
printf HDR "\\%0.3o", $code;
}
else
{
printf HDR "\\%0.3o\\%0.3o", int ($code / 256), $code % 256;
}
}
print HDR "\"},\n";
}
print HDR " };\n";
print STDERR "done\n";
close HDR;
exit 0;
# Comparison routine for descending frequency sort.
sub descending
{
local ($result);
$result = $code{$b} - $code{$a};
$result == 0 ? $a cmp $b : $result;
}